import os
import shutil

import pandas
import random
#生成攻击数据集的代码 数据集包括500个真样本 1500个假样本
#命名更加具体改为attackdataset以及attackdataset_label
def gen_attack_dataset():
    train_label=r"N:\csy\ASVNormClip40000Dataset\train_label.txt"
    new_train_label=r"N:\csy\ASVNormClip40000Dataset\attackdataset1_label.txt"
    old_dataset=r"N:\csy\ASVNormClip40000Dataset\train"
    new_dataset = r"N:\csy\ASVNormClip40000Dataset\attackdataset1"
    pd = pandas.read_csv(train_label, sep=' ', header=None)
    spoof_files = pd[pd[1] == 'fake'][0]
    genuine_files = pd[pd[1] == 'genuine'][0]
    genuine_files=genuine_files.values.tolist()[:0]
    spoof_files=spoof_files.values.tolist()[:1000]
    random.shuffle(spoof_files)
    selectf=genuine_files+spoof_files
    selectlabel=['genuine']*len(genuine_files)+['fake']*len(spoof_files)
    with open(new_train_label,"w") as f:
        for fff,l in zip(selectf,selectlabel):
            shutil.copy(os.path.join(old_dataset,fff),os.path.join(new_dataset,fff))
            f.write(fff+" "+l+"\n")


if __name__=="__main__":
    gen_attack_dataset()

